load data

geno <- read_delim("tables/TableS1_genotypes.tsv")
pheno <- read_delim("tables/TableS2_phenotypes_metadata.tsv")
cipro_antibiogram <- full_join(geno,pheno)

set colours

wt_colours <- c(`non-wt (I/R)`="IndianRed", `wt (S)`= "LightBlue")
res_colours <- c("I"="grey", "R"="IndianRed", "S"="LightBlue", "NWT"="grey")
res_colours2 <- c("I"="black", "R"="IndianRed", "S"="LightBlue", "NWT"="black")
res_colours3 <- c("NWT I"="#78638a", "NWT R"="IndianRed", "WT S"="LightBlue")
# position dodge for coefficient plots
pd <- position_dodge(width=0.8)

function to tabulate estimate, 95% CI, p-value from logistic regression model

logreg_details <- function(model) {
  model_summary <- cbind(est=model$coefficients, ci.lower=model$ci.lower, ci.upper=model$ci.upper, pval=model$prob) %>%
    as_tibble(rownames="Determinant")
  return(model_summary)
}

function to tabulate estimate, 95% CI, p-value from linear regression model

linreg_details <- function(model) {
  ci <- confint(model) %>% as_tibble(rownames="Determinant") %>%
    rename(ci.lower=`2.5 %`, ci.upper=`97.5 %`)
  model_summary <- summary(model)$coef %>% 
    as_tibble(rownames="Determinant") %>% 
    rename(est=Estimate, pval=`Pr(>|t|)`) %>% 
    select(Determinant, est, pval) %>% left_join(ci)
  return(model_summary)
}

functions to calculate PPV and categorical agreement from models

cat_agree <- function(pred,truth) {
  xtabs <- table(pred,factor(truth, levels = 0:1))
  return((xtabs[1,1]+xtabs[2,2])/sum(xtabs))
}

ppv <- function(pred,truth) {
  xtabs <- table(pred,truth)
  return(xtabs[2,2]/sum(xtabs[2,]))
}

npv <- function(pred,truth) {
  xtabs <- table(pred,truth)
  return(xtabs[1,1]/sum(xtabs[1,]))
}

#sens <- function(pred,truth) {
#  xtabs <- table(pred,truth)
#  return(xtabs[2,2]/(sum(xtabs[,2])))
#}

sens <- function(pred,truth) {
  xtabs <- table(pred,factor(truth, levels = 0:1))
  return(xtabs[2,2]/(sum(xtabs[,2])))
}

spec <- function(pred,truth) {
  xtabs <- table(pred,truth)
  return(xtabs[1,1]/(sum(xtabs[,1])))
}

me <- function(pred,truth) {
  xtabs <- table(pred,truth)
  return(xtabs[2,1]/sum(xtabs[,1]))
}

vme <- function(pred,truth) {
  xtabs <- table(pred,factor(truth, levels = 0:1))
  return(xtabs[1,2]/sum(xtabs[,2]))
}

metrics <- function(pred,truth) {
  xtabs <- table(pred,truth)
  return(list(cat=cat_agree(pred,truth),
              ppv=ppv(pred,truth),
              npv=npv(pred,truth),
              sens=sens(pred,truth),
              spec=spec(pred,truth),
              me=me(pred,truth),
              vme=vme(pred,truth),
              n=sum(xtabs), 
              summary=c("cat"=cat_agree(pred,truth),
                    "sens"=sens(pred,truth),
                    "spec"=spec(pred,truth),
                    "me"=me(pred,truth),
                    "vme"=vme(pred,truth),
                    "n"=sum(xtabs)
              )
          )
    )
}

Logistic regression models

Determinants vs R

# prepare matrix for modelling
R_vs_dets <- cipro_antibiogram %>% 
  select(`GyrA-83F`:`qnrVC6`, aac6, resistant) %>%
  rename(`aac(6')-Ib-cr`=aac6) %>%
  select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches

# exclude genomes with rare variants
R_vs_dets <- R_vs_dets[,colSums(R_vs_dets) >= 20]

# model R vs all common variants (N>=20) = Model 1a
model_R_indiv <- logistf(resistant ~ ., data=R_vs_dets)

# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_R_indiv_aac6Interaction <- logistf(resistant ~ .*`aac(6')-Ib-cr`, data=R_vs_dets)

Summarise/plot coefficients per determinant - R models

# summarise model output
logreg_model1a <- logreg_details(model_R_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
    mutate(Determinant=gsub("`","",Determinant)) 
logreg_model1b <- logreg_details(model_R_indiv_aac6Interaction) %>% 
  rename(Term=Determinant) %>%
  separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>% 
  mutate(model="b") %>%
  mutate(Determinant=gsub("`","",Determinant))  %>%
  mutate(Term=gsub("`","",Term))  %>%
  mutate(interaction=gsub("`","",interaction)) 

# all individual determinants, plus significant positive interactions
log_reg_R_plot <- logreg_model1a %>% bind_rows(logreg_model1b) %>%
  filter((pval<0.05 & est>0) | is.na(interaction)) %>%
  mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
  filter(Determinant != "(Intercept)") %>%
  ggplot(aes(y=Determinant)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
  geom_point(aes(x=est, group=model, col=model), position=pd) + 
  scale_colour_manual(values=c(a="#f1933b", b="#bd1515")) +
  theme_bw() + labs(x="Regression coefficient", col="R Model", linetype="Interaction")

log_reg_R_plot

Determinants vs NWT

# prepare matrix for modelling
NWT_vs_dets <- cipro_antibiogram %>% 
  select(`GyrA-83F`:`qnrVC6`, aac6, nonWT_binary) %>%
  rename(`aac(6')-Ib-cr`=aac6) %>%
  select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches

# exclude genomes with rare variants
NWT_vs_dets <- NWT_vs_dets[,colSums(NWT_vs_dets) >= 20]

# model NWT vs all common variants (N>=20) = Model 3a
model_NWT_indiv <- logistf(nonWT_binary ~ ., data=NWT_vs_dets)

# model NWT vs all common variants (N>=20), each with aac6 interaction = Model 3b
model_NWT_indiv_aac6Interaction <- logistf(nonWT_binary ~ .*`aac(6')-Ib-cr`, data=NWT_vs_dets)

Summarise/plot coefficients per determinant - NWT models

# summarise model output
logreg_model3a <- logreg_details(model_NWT_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
    mutate(Determinant=gsub("`","",Determinant)) 
logreg_model3b <- logreg_details(model_NWT_indiv_aac6Interaction) %>% 
  rename(Term=Determinant) %>%
  separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>% 
  mutate(model="b") %>%
  mutate(Determinant=gsub("`","",Determinant))  %>%
  mutate(Term=gsub("`","",Term))  %>%
  mutate(interaction=gsub("`","",interaction)) 

# get count per marker to label on the right
marker_count <- cipro_antibiogram %>% 
  rename(`aac(6')-Ib-cr`=aac6) %>%
  select(`GyrA-83F`:`qnrVC6`, `aac(6')-Ib-cr`) %>%
  colSums()

marker_count_order <- logreg_model3a %>% bind_rows(logreg_model3b) %>%
    filter((pval<0.05 & est>0) | is.na(interaction)) %>%
    filter(Determinant != "(Intercept)") %>% pull(Determinant) %>% unique() %>% sort()

log_reg_NWT_plot <- logreg_model3a %>% bind_rows(logreg_model3b) %>%
  filter((pval<0.05 & est>0) | is.na(interaction)) %>%
  mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
  filter(Determinant != "(Intercept)") %>%
  ggplot(aes(y=Determinant)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
  geom_point(aes(x=est, group=model, col=model), position=pd) + 
  scale_colour_manual(values=c(a="#70bfef", b="#2c15ae")) +
  theme_bw() + labs(x="Regression coefficient", col="NWT Model", linetype="Interaction", y="") + 
  scale_y_discrete(labels=paste0("n=",marker_count[marker_count_order]), position="right")

log_reg_NWT_plot

TableS6 - write out logistic regression coefficients

multiVariable_vs_singleVariable <- logreg_model1a %>% select(-c(interaction, model)) %>%
  setNames(paste0('R Model1a.', names(.))) %>% rename(Term='R Model1a.Determinant') %>% 
  full_join(logreg_model1b %>% select(-c(Determinant, interaction, model)) %>% setNames(paste0('R Model1b.', names(.)))%>% rename(Term='R Model1b.Term'), by="Term") %>% 
  full_join(logreg_model3a %>% select(-c(interaction, model)) %>% setNames(paste0('NWT Model1a.', names(.))) %>% rename(Term='NWT Model1a.Determinant'), by="Term") %>% 
  full_join(logreg_model3b %>% select(-c(Determinant, interaction, model)) %>% setNames(paste0('NWT Model1b.', names(.)))%>% rename(Term='NWT Model1b.Term'), by="Term")

write_csv(multiVariable_vs_singleVariable, file="tables/TableS6_LogRegCoef.csv")

read in Table 1, solo PPVs

ppv_solo <- read_csv("tables/TableS5_determinantStats.csv") %>% filter(!is.na(N.solo)) %>% select(Determinant, ends_with(".solo"))

ppvR <- ppv_solo %>% mutate(p=R.solo/N.solo, se=sqrt(p*(1-p)/N.solo), ci.lower=p-1.96*se, ci.upper=p+1.96*se) %>% select(Determinant, p, ci.lower, ci.upper) %>% mutate(category="R") 

ppvNWT <- ppv_solo %>% mutate(p=NWT.solo/N.solo, se=sqrt(p*(1-p)/N.solo), ci.lower=p-1.96*se, ci.upper=p+1.96*se) %>% select(Determinant, p, ci.lower, ci.upper, N.solo) %>% mutate(category="NWT")

ppvR_plot <- ppvR %>% 
  ggplot(aes(y=Determinant)) +
  geom_vline(xintercept=50, linetype=2) +
  geom_linerange(aes(xmin=ci.lower*100, xmax=ci.upper*100), col="#bd1515", position=pd) +
  geom_point(aes(x=p*100, group=category), col="#bd1515", position=pd) + 
  theme_bw() + labs(x="PPV (%)")

ppvNWT_plot <- ppvNWT %>% 
  ggplot(aes(y=Determinant)) +
  geom_vline(xintercept=50, linetype=2) +
  geom_linerange(aes(xmin=ci.lower*100, xmax=ci.upper*100), col="#2c15ae", position=pd) +
  geom_point(aes(x=p*100, group=category), col="#2c15ae", position=pd) + 
  theme_bw() + labs(x="PPV (%)", y="") + 
  scale_y_discrete(labels=paste0("n=",ppvNWT$N.solo), position="right")

Figure 1 - PPV and regression model coefficients, for individual determinants

(ppvR_plot + ggtitle(label="a) PPV for solo marker", subtitle="R") + ppvNWT_plot + ggtitle(label="", subtitle="NWT") + plot_layout(guides="collect", axes="collect")) / (log_reg_R_plot + ggtitle(label="b) Logistic regression", subtitle="R") + log_reg_NWT_plot + ggtitle(label="", subtitle="NWT") + plot_layout(guides="collect", axes="collect"))

ggsave(width=8, height=9, file="figs/Fig1_individual_variable_stats.pdf")
ggsave(width=8, height=9, file="figs/Fig1_individual_variable_stats.png")

Model performance

model predictions - individual determinants

# R models
model_R_indiv_pred <- predict(model_R_indiv, type="response")
model_R_indiv_aac6Interaction_pred <- predict(model_R_indiv_aac6Interaction, type="response")

# NWT models
model_NWT_indiv_pred <- predict(model_NWT_indiv, type="response")
model_NWT_indiv_aac6Interaction_pred <- predict(model_NWT_indiv_aac6Interaction, type="response")

Logistic regression at the counts level - R, NWT

# R models
model_R_groups <- logistf(resistant ~ QRDR_mutations + acquired_genes + aac6, data=cipro_antibiogram)
model_R_groups_selectInteract <- logistf(resistant ~ QRDR_mutations*aac6 + acquired_genes*aac6, data=cipro_antibiogram)

# NWT models
model_NWT_groups <- logistf(nonWT_binary ~ QRDR_mutations + acquired_genes + aac6, data=cipro_antibiogram)
model_NWT_groups_selectInteract <- logistf(nonWT_binary ~ QRDR_mutations*aac6 + acquired_genes*aac6, data=cipro_antibiogram)

# R model predictions
model_R_groups_pred <- predict(model_R_groups, type="response")
model_R_groups_selectInteract_pred <- predict(model_R_groups_selectInteract, type="response")

# NWT model predictions
model_NWT_groups_pred <- predict(model_NWT_groups, type="response")
model_NWT_groups_selectInteract_pred <- predict(model_NWT_groups_selectInteract, type="response")
count_model_summary <- logreg_details(model_R_groups) %>% mutate(Model="2a", response="R") %>%
  bind_rows(logreg_details(model_R_groups_selectInteract) %>% mutate(Model="2b", response="R")) %>% 
  bind_rows(logreg_details(model_NWT_groups) %>% mutate(Model="2a", response="NWT")) %>%
  bind_rows(logreg_details(model_NWT_groups_selectInteract) %>% mutate(Model="2b", response="NWT")) %>%
  relocate(Model, .before=Determinant) %>% relocate(response, .before=Model) %>%
  write_csv(file="tables/TableS7_CountLogRegCoef.csv")

Table 1 - model summary table

aic <- c(extractAIC(model_R_indiv)[2], 
         extractAIC(model_R_indiv_aac6Interaction)[2], 
         extractAIC(model_R_groups)[2], 
         extractAIC(model_R_groups_selectInteract)[2], 
         extractAIC(model_NWT_indiv)[2], 
         extractAIC(model_NWT_indiv_aac6Interaction)[2], 
         extractAIC(model_NWT_groups)[2], 
         extractAIC(model_NWT_groups_selectInteract)[2])

anova_result <- c("-",anova(model_R_indiv, model_R_indiv_aac6Interaction)$pval,
                  "-",anova(model_R_groups, model_R_groups_selectInteract)$pval,
                  "-",anova(model_NWT_indiv, model_NWT_indiv_aac6Interaction)$pval, 
                  "-", anova(model_NWT_groups, model_NWT_groups_selectInteract)$pval)

metrics_result <- rbind(
  metrics(model_R_indiv_pred>0.5, R_vs_dets$resistant)$summary,
  metrics(model_R_indiv_aac6Interaction_pred>0.5, R_vs_dets$resistant)$summary,
  metrics(model_R_groups_pred>0.5, cipro_antibiogram$resistant)$summary,
  metrics(model_R_groups_selectInteract_pred>0.5, R_vs_dets$resistant)$summary,
  metrics(model_NWT_indiv_pred>0.5, NWT_vs_dets$nonWT_binary)$summary,
  metrics(model_NWT_indiv_aac6Interaction_pred>0.5, NWT_vs_dets$nonWT_binary)$summary,
  metrics(model_NWT_groups_pred>0.5, cipro_antibiogram$nonWT_binary)$summary,
  metrics(model_NWT_groups_selectInteract_pred>0.5, cipro_antibiogram$nonWT_binary)$summary
)

model_summary_tab <- cbind(aic, anova_result, metrics_result) %>% as_tibble() %>%
  rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat) %>%
  rename(`model fit (aac6 interaction)`=anova_result) %>% 
  rename(AIC=aic) %>%
  mutate(ModelName=c("Model 1a", "Model 1b", "Model 2a", "Model 2b", "Model 1a", "Model 1b", "Model 2a", "Model 2b")) %>%
  mutate(response = c(rep("R",4), rep("NWT",4))) %>%
  mutate(predictors = rep(c("determinants", "determinant*aac6", "QRDR count + PMQR count + aac6","QRDR count*aac6 + PMQR count*aac6"),2))

Rules-based classifier

cipro_antibiogram <- cipro_antibiogram %>% 
  mutate(QRDR_mutations_exclude = str_count(Flq_mutations, "GyrA-87G") + str_count(Flq_mutations, "GyrA-87H")) %>% 
  mutate(PMQR_exclude = str_count(Flq_acquired, "qnrB1.v1") + str_count(Flq_acquired, "qnrB1.v2")) %>%
  mutate(rules_category = case_when(
    QRDR_mutations==0 & acquired_genes==0 & aac6==0 ~ 0, # no determinants -> WT S
    QRDR_mutations==1 & acquired_genes==0 & aac6==0 & QRDR_mutations_exclude==1 ~ 0, # non-associated QRDR solo -> WT S
    QRDR_mutations==0 & acquired_genes==0 & aac6==1 ~ 1, # aac6 solo -> WT S
    QRDR_mutations==0 & acquired_genes==1 & aac6==0 & PMQR_exclude==1 ~ 2, # non-associated PMQR solo -> NWT I
    QRDR_mutations==1 & acquired_genes==0 & aac6==0 & QRDR_mutations_exclude==0 ~ 3, # 1 QRDR, except those not associated -> NWT R
    QRDR_mutations==1 & acquired_genes==0 & aac6==1 ~ 4, # 1 QRDR + aac6 -> NWT R
    QRDR_mutations>=2 & acquired_genes==0 ~ 5, # ≥2 QRDR -> NWT R
    QRDR_mutations==0 & acquired_genes==1 & aac6==0 & PMQR_exclude==0 ~ 6, # 1 PMQR, except those not associated -> NWT R
    QRDR_mutations==0 & acquired_genes==1 & aac6==1 ~ 7, # 1 PMQR + aac6
    QRDR_mutations==0 & acquired_genes>=2 ~ 8, # ≥2 PMQR -> NWT R
    QRDR_mutations>=1 & acquired_genes>=1 ~ 9, # QRDR + PMQR -> NWT R
    TRUE ~ NA
  ))

# check all are assigned
table(cipro_antibiogram$rules_category)
## 
##    0    1    2    3    4    5    6    7    8    9 
## 5680  161  160  103   23 2167  546  819   68 2440
table(is.na(cipro_antibiogram$rules_category))
## 
## FALSE 
## 12167
cipro_antibiogram %>% filter(is.na(rules_category)) %>% select(Flq_mutations, Flq_acquired)
# assign S/R predictions to categories
cipro_antibiogram <- cipro_antibiogram %>% 
  mutate(predSIR = if_else(rules_category<=2, "S", "R")) %>%
  mutate(predSIR = replace(predSIR, rules_category==2, "I")) %>%
  mutate(predSIR = factor(predSIR, levels=c("S", "I", "R"))) %>%
  mutate(predR = if_else(rules_category<=2, "S", "R")) %>%
  mutate(predR = factor(predR, levels=c("S", "R"))) %>%
  mutate(predNWT = if_else(rules_category<2, "WT", "NWT")) %>%
  mutate(predNWT = factor(predNWT, levels=c("WT", "NWT")))

table(cipro_antibiogram$rules_category, cipro_antibiogram$predR)
##    
##        S    R
##   0 5680    0
##   1  161    0
##   2  160    0
##   3    0  103
##   4    0   23
##   5    0 2167
##   6    0  546
##   7    0  819
##   8    0   68
##   9    0 2440
table(cipro_antibiogram$rules_category, cipro_antibiogram$predSIR)
##    
##        S    I    R
##   0 5680    0    0
##   1  161    0    0
##   2    0  160    0
##   3    0    0  103
##   4    0    0   23
##   5    0    0 2167
##   6    0    0  546
##   7    0    0  819
##   8    0    0   68
##   9    0    0 2440
table(cipro_antibiogram$rules_category, cipro_antibiogram$predNWT)
##    
##       WT  NWT
##   0 5680    0
##   1  161    0
##   2    0  160
##   3    0  103
##   4    0   23
##   5    0 2167
##   6    0  546
##   7    0  819
##   8    0   68
##   9    0 2440

Table 1b and numbers for text - classifier overall metrics, by species

species_R <- cipro_antibiogram %>% group_by(species) %>% 
  filter(species != "Klebsiella variicola subsp. tropica") %>%
  filter(species != "Klebsiella quasivariicola") %>%
  filter(species != "Klebsiella africana") %>%
  summarise(cat=cat_agree(predR, resistant), 
            sens=sens(predR, resistant), 
            spec=spec(predR, resistant), 
            me=me(predR, resistant), 
            vme=vme(predR, resistant),
            n=n())%>%
  mutate(out=rep("R", 4)) %>% 
  relocate(out, .before=species)

species_NWT <- cipro_antibiogram %>% group_by(species) %>% 
  filter(species != "Klebsiella variicola subsp. tropica") %>%
  filter(species != "Klebsiella quasivariicola") %>%
  filter(species != "Klebsiella africana") %>%
  summarise(cat=cat_agree(predNWT, nonWT_binary), 
            sens=sens(predNWT, nonWT_binary), 
            spec=spec(predNWT, nonWT_binary), 
            me=me(predNWT, nonWT_binary), 
            vme=vme(predNWT, nonWT_binary),
            n=n()) %>%
  mutate(out=rep("NWT", 4)) %>% 
  relocate(out, .before=species)

bind_rows(c("out"="R","species"="Species complex", 
            metrics(cipro_antibiogram$predR, cipro_antibiogram$resistant)$summary), 
          c("out"="NWT","species"="Species complex",
            metrics(cipro_antibiogram$predNWT, cipro_antibiogram$nonWT_binary)$summary)
          ) %>%
  mutate_at(c("cat","sens","spec","me","vme", "n"), as.double) %>%
  bind_rows(species_R) %>%
  bind_rows(species_NWT) %>%
  rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat, N=n) %>%
  mutate(Sensitivity=paste0(round(as.numeric(Sensitivity)*100,2),"%")) %>%
  mutate(Specificity=paste0(round(as.numeric(Specificity)*100,2),"%")) %>%
  mutate(ME=paste0(round(as.numeric(ME)*100,2),"%")) %>%
  mutate(VME=paste0(round(as.numeric(VME)*100,2),"%")) %>%
  mutate(`Categorical agreement`=paste0(round(as.numeric(`Categorical agreement`)*100,2),"%")) %>%
  select(out, species, `Categorical agreement`, Sensitivity, Specificity, ME, VME, N) %>%
  write_csv("tables/Table1b_ModelSummary.csv", na="-")

Figure 5 - classifier overall metrics, by ST, Country, Source Type, Infection status

ST_R <- cipro_antibiogram %>% group_by(ST) %>% 
  summarise(sens=sens(predR, resistant)*100, 
            spec=spec(predR, resistant)*100, 
            cat=cat_agree(predR, resistant)*100, 
            me=me(predR, resistant)*100, 
            vme=vme(predR, resistant)*100, 
            n=n(),
            percent_R=(sum(resistant))/n*100) %>%
  mutate(out=rep("R", 1992)) %>% 
  mutate(category=rep("ST", 1992)) %>% 
  relocate(out, category, .before=ST)%>%
  rename(category_name=ST)
Country_R <- cipro_antibiogram %>% group_by(Country) %>% 
  summarise(sens=sens(predR, resistant)*100, 
            spec=spec(predR, resistant)*100, 
            cat=cat_agree(predR, resistant)*100, 
            me=me(predR, resistant)*100, 
            vme=vme(predR, resistant)*100, 
            n=n(),
            percent_R=(sum(resistant))/n*100) %>%
  mutate(out=rep("R", 27)) %>% 
  mutate(category=rep("Country", 27)) %>%   
  relocate(out, category, .before=Country)%>%
  rename(category_name=Country)
SourceType_R <- cipro_antibiogram %>% group_by(Source.Type) %>% 
  summarise(sens=sens(predR, resistant)*100, 
            spec=spec(predR, resistant)*100, 
            cat=cat_agree(predR, resistant)*100, 
            me=me(predR, resistant)*100, 
            vme=vme(predR, resistant)*100, 
            n=n(),
            percent_R=(sum(resistant))/n*100) %>%
  mutate(out=rep("R", 6)) %>% 
  mutate(category=rep("Source", 6)) %>%  
  relocate(out, category, .before=Source.Type)%>%
  rename(category_name=Source.Type)
cipro_antibiogram$Infection.status <- cipro_antibiogram$Infection.status %>% replace_na("Unknown")

InfectionStatus_R <- cipro_antibiogram %>% group_by(Infection.status) %>% 
  summarise(sens=sens(predR, resistant)*100, 
            spec=spec(predR, resistant)*100, 
            cat=cat_agree(predR, resistant)*100, 
            me=me(predR, resistant)*100, 
            vme=vme(predR, resistant)*100, 
            n=n(),
            percent_R=(sum(resistant))/n*100) %>%
  mutate(out=rep("R", 3)) %>% 
  mutate(category=rep("Inf Status", 3)) %>% 
  relocate(out, category, .before=Infection.status) %>%
  rename(category_name=Infection.status)
Combined_R <- bind_rows(ST_R, Country_R, SourceType_R, InfectionStatus_R)

Combined_R <- Combined_R %>%
  rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat, N=n, `Resistant`=percent_R, Category=category, Prediction=out, `Category Name`=category_name) %>%
  write_csv("tables/TableSX_ModelSummary_Combined.csv", na="-")

#filter for categories of genomes with >99 genomes
Combined_R_100 <- Combined_R %>% 
  filter(Resistant<100) %>%
  filter(N>40) %>%
  mutate(Category_label=paste(`Category Name`, " (n=", N, ")", sep = "")) %>%
  select(Category, Category_label, N, Sensitivity, Specificity, `Resistant`)

# convert table from wide to long for plotting 
Combined_R_100_long<-gather(Combined_R_100, `Evaluation metric`, value, Sensitivity:`Resistant`)
Combined_R_100_long <- Combined_R_100_long[order(Combined_R_100_long$N, decreasing = TRUE),]


Combined_R_100_plot <-ggplot(Combined_R_100_long, aes(as.numeric(value),fct_inorder(as.factor(Category_label)))) +
  geom_point(aes(shape = `Evaluation metric`, size=`Evaluation metric`, colour=`Evaluation metric`), alpha=0.5) +
  scale_shape_manual(values = c(Sensitivity=15, Specificity=18, Resistant=3)) +
  scale_size_manual(values = c(Sensitivity=3, Specificity=3, Resistant=1)) +
  scale_colour_manual(values=c(Sensitivity="#8eb567", Specificity="#9a98ea", Resistant="IndianRed")) +
  scale_y_discrete(limits=rev)+
  facet_grid(Category~., scales = "free", space='free')+ 
  labs(y="", x = "%") + 
  theme_bw()+
  theme(legend.title=element_blank())

Combined_R_100_plot

ggsave(height=7, width=6, file="figs/Fig5_classifierPerformance_category.png")
ggsave(height=7, width=6, file="figs/Fig5_classifierPerformance_category.pdf")

classifier - per genotype profile PPV

category_phenotype_table <- cipro_antibiogram %>% 
  group_by(rules_category, resistant) %>% 
  count() %>% 
  ungroup() %>%
  pivot_wider(names_from=resistant, values_from=n, values_fill=0) %>%
  rename(SI=`0`, R=`1`)

category_phenotype_table <- cipro_antibiogram %>% 
  group_by(rules_category, WT_vs_nonWT) %>% 
  count() %>% 
  ungroup() %>%
  pivot_wider(names_from=WT_vs_nonWT, values_from=n, values_fill=0) %>%
  rename(NWT=`non-wt (I/R)`, WT=`wt (S)`) %>%
  full_join(category_phenotype_table)

category_phenotype_table <- category_phenotype_table %>% 
  mutate(N=NWT+WT) %>%
  mutate(`R PPV`=paste0(round(R/N*100,2), "% (N=", R,"/",N,")")) %>%
  mutate(`NWT PPV`=paste0(round(NWT/N*100,2), "% (N=", NWT,"/",N,")"))

category_phenotype_table

Table 2 - summarise classifier by genotype profile

predR_summary <- cipro_antibiogram %>% 
  group_by(predR, resistant) %>% 
  count() %>% 
  ungroup() %>%
  pivot_wider(names_from=resistant, values_from=n, values_fill=0) %>%
  rename(SI=`0`, R=`1`) %>%
  mutate(N=SI+R) %>%
  mutate(PPV = R/N) 

predNWT_summary <- cipro_antibiogram %>% 
  group_by(predNWT, WT_vs_nonWT) %>% 
  count() %>% 
  ungroup() %>%
  pivot_wider(names_from=WT_vs_nonWT, values_from=n, values_fill=0) %>%
  mutate(N=`non-wt (I/R)`+`wt (S)`) %>%
  mutate(PPV = `non-wt (I/R)`/N) 

category_phenotype_table %>% 
  mutate(QRDR=c("0^", "0", "0", "1", "1", ">1", "0", "0", "0", ">0")) %>%
  mutate(PMQR=c("0", "0", "qnrB1", "0", "0", "0", "1^", "1", ">1", ">0")) %>%
  mutate(aac6=c("0", "1", "0", "0", "1", "*", "0", "1", "*", "*")) %>%
  mutate(Prediction=c("WT S", "WT S", "NWT I", rep("NWT R", 7))) %>%
  mutate(N=as.character(N)) %>%
  select(QRDR, PMQR, aac6, Prediction, N, `R PPV`, `NWT PPV`) %>%
  bind_rows(c(QRDR="Overall", PMQR="", aac6="", Prediction="-", N=as.character(nrow(cipro_antibiogram)), 
              `R PPV`=paste0(round(predR_summary[2, "PPV"]*100,2),"% (N=", predR_summary[2,"R"], "/", predR_summary[2,"N"],")"),
              `NWT PPV`=paste0(round(predNWT_summary[2, "PPV"]*100,2),"% (N=", predNWT_summary[2,"non-wt (I/R)"], "/", predNWT_summary[2,"N"],")")
              )) %>%
  write_csv(file="tables/Table2_Classifier.csv")

write(x="\n^=GyrA-87G and GyrA-87H are not included in QRDR count; qnrB1 is excluded from the single-PMQR count. *aac6 presence is not considered", file="tables/Table2_Classifier.csv", append=T)

Fig 4 - rules prediction plots

# MIC distribution R vs. S
rules_MIC_distributionRS <- cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method != "Disk diffusion") %>% 
  mutate(predSIR_label=case_when(predSIR=="S" ~ "WT S", predSIR=="R" ~ "NWT R", TRUE ~ "NWT I")) %>%
  mutate(predSIR_label=factor(predSIR_label, levels=c("WT S", "NWT I", "NWT R"))) %>%
  ggplot(aes(as.factor(round(as.numeric(Measurement), digits=2)), fill = predSIR_label)) + geom_bar() + 
  labs(title = "A) MIC, by classifer prediction", y = "Number of genomes", x="MIC (mg/L)", 
       linetype = "MIC breakpoints", fill="Classifier prediction") +
  # round off x-axis labels to 2dp
  scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
  scale_fill_manual(values = res_colours3)+
  geom_vline(aes(xintercept = 5.5), color = "black") +
  annotate("text", x=5.75, y=2900, label="R", size=2.5)+
  geom_vline(aes(xintercept = 4.5), color = "black") +
  annotate("text", x=4.25, y=2900, label="S", size=2.5)+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, hjust=1, size=11))

rules_MIC_distributionRS

# DD distribution
rules_DD_distributionRS <- cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  mutate(predSIR_label=case_when(predSIR=="S" ~ "WT S", predSIR=="R" ~ "NWT R", TRUE ~ "NWT I")) %>%
  mutate(predSIR_label=factor(predSIR_label, levels=c("WT S", "NWT I", "NWT R"))) %>%
  ggplot(aes(as.numeric(Measurement), fill = predSIR_label)) + geom_bar() + 
  labs(title = "B) Disk zones, by classifer prediction", y = "Number of genomes", x ="Disk zone (mm)",
       linetype = "Disk diffusion \nbreakpoints") +
        scale_fill_manual(values = res_colours3)+
  geom_vline(aes(xintercept = 21.5), color = "black") +
  annotate("text", x=20.5, y=400, label="R", size=2.5)+
  geom_vline(aes(xintercept = 24.5), color = "black") +
  #geom_vline(aes(xintercept = 25.5), color = "black", linetype=2) +
  annotate("text", x=25.5, y=400, label="S", size=2.5)+
  theme_minimal() + 
  theme(axis.text.x = element_text(size=11))+
  theme(legend.position = "none")

rules_DD_distributionRS

Figure 4 - classifier predictions vs measurements

rules_MIC_distributionRS / rules_DD_distributionRS

ggsave(height=5, width=6, file="figs/Fig4_classifierPred_MIC_DD_distribution.png")

ggsave(height=5, width=6, file="figs/Fig4_classifierPred_MIC_DD_distribution.pdf")

Fig S9 MIC/DD distribution by genotype profile

genotype profile labels and colours

profile_labels <- paste(
  paste0(c("0^", "0", "0", "1", "1", ">1", "0", "0", "0", ">0"), rep(" QRDR,", 10)),
  c("0 PMQR,", "0 PMQR,", "qnrB1,", "0 PMQR,", "0 PMQR,", "0 PMQR", "1 PMQR^,", "1 PMQR,", ">1 PMQR", ">0 PMQR"),
  c("aac6-", "aac6+", "aac6-", "aac6-", "aac6+", "", "aac6-", "aac6+", "", "")
)
names(profile_labels)<-0:9

profile_colours <- c("0" = "#9DDFFE", "1"="#1A83B6", "2"="#98d3a4", "3"="#fbe8a5", "4"="#ecb16f", "5"="#f17c1b", "6"="#fdc0c0", "7"="#e88f8f", "8"="#b92020", "9"="#59124b")

MIC distribution

rules_MIC_distribution <- cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method != "Disk diffusion") %>% 
  ggplot(aes(x = as.factor(round(as.numeric(Measurement), digits=2)), fill = as.factor(rules_category))) + geom_bar() + 
  labs(x = "MIC (mg/L)", y = "Number of genomes", 
       linetype = "MIC breakpoints", title="A) MIC, by genotype profile", fill="Genotype profile") +
  scale_fill_manual(values=profile_colours, labels=profile_labels)+
  # round off x-axis labels to 2dp
  scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
  geom_vline(aes(xintercept = 5.5), color = "black") +
  annotate("text", x=5.75, y=2900, label="R", size=2.5)+
  geom_vline(aes(xintercept = 4.5), color = "black") +
  annotate("text", x=4.25, y=2900, label="S", size=2.5)+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, hjust=1, size=11))

rules_MIC_distribution

# DD distribution
rules_DD_distribution <- cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  ggplot(aes(as.numeric(Measurement), fill = as.factor(rules_category))) + geom_bar() + 
  scale_fill_manual(values=profile_colours, labels=profile_labels)+
  geom_vline(aes(xintercept = 21.5), color = "black") +
  annotate("text", x=20.5, y=400, label="R", size=2.5)+
  geom_vline(aes(xintercept = 24.5), color = "black", linetype=2) +
  geom_vline(aes(xintercept = 25.5), color = "black", linetype=2) +
  annotate("text", x=26.5, y=400, label="S", size=2.5)+
  theme_minimal()+
  labs(y = "Number of genomes", x="Disk zone (mm)", title="B) Disk zones, by genotype profile") +
  theme_minimal() + 
  theme(axis.text.x = element_text(size=11))+
  theme(legend.position = "none")
rules_category_SIR_plot <- cipro_antibiogram %>% 
  ggplot(aes(x=as.factor(rules_category), fill=factor(Resistance.phenotype, levels = c("S", "I", "R")))) + 
  geom_bar(position = "fill") + coord_flip() +
  scale_y_continuous(labels = scales::percent)+
  labs(title="C) Observed phenotype distribution, per genotype profile", x="Genotype profile", y="Proportion (%)", fill="Phenotype") +
  scale_fill_manual(values = res_colours)+
  scale_x_discrete(limits=rev, labels=profile_labels) +
  geom_text_repel(aes(label = after_stat(count)), stat = "count", size=3, direction="x", position=position_fill(0.4)) +
  theme_minimal()
  rules_category_SIR_plot

Figure S10 - genotype profiles vs measurements

rules_MIC_distribution / rules_DD_distribution / rules_category_SIR_plot

ggsave(height=10, width=8, file="figs/FigS10_genotypeProfiles_MIC_DD_distribution.png")

ggsave(height=10, width=8, file="figs/FigS10_genotypeProfiles_MIC_DD_distribution.pdf")

numbers for text - ATU discrepant isolates

dd_atu <- cipro_antibiogram %>% filter(Resistance.phenotype!="R" & predSIR=="R") %>% filter(Laboratory.Typing.Method == "Disk diffusion") %>% filter(as.numeric(Measurement) >21 & as.numeric(Measurement)<24) 

mic_atu <- cipro_antibiogram %>% filter(Resistance.phenotype!="R" & predSIR=="R") %>% filter(Laboratory.Typing.Method != "Disk diffusion") %>% filter(as.numeric(Measurement) ==0.5) 

atu <- bind_rows(mic_atu, mic_atu) 

atu %>% group_by(Flq_acquired, Flq_mutations) %>% count()  %>% mutate(pc=n/nrow(atu))

numbers for text - unexplained resistance

# number R with no res determinants
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow()
## [1] 140
cipro_antibiogram %>% filter(Resistance.phenotype=="R") %>% nrow()
## [1] 6177
# assay measures for unexplained R
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(Laboratory.Typing.Method!="Disk diffusion" & Measurement==1) %>% nrow()
## [1] 53
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(Laboratory.Typing.Method=="Disk diffusion" & Measurement %in% c(20, 21)) %>% nrow()
## [1] 20
# STs for unexplained R
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(ST) %>% count()
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(ST) %>% count() %>% nrow()
## [1] 84
# porin mutations in all genomes without QRDR/PMQR/aac6
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0& aac6==0 & Omp_mutations !="-") %>% nrow()
## [1] 94
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow()
## [1] 5671
(cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0& aac6==0 & Omp_mutations !="-") %>% nrow()) / (cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow())
## [1] 0.01657556
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(Omp_mutations) %>% count()
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(grepl('OmpK35', Omp_mutations))%>% nrow()
## [1] 45
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(grepl('OmpK36', Omp_mutations))%>% nrow()
## [1] 53
# genomes without QRDR/PMQR/aac6
noResDet <- cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% mutate(ompK35=if_else(grepl('OmpK35', Omp_mutations), 1, 0)) %>% mutate(ompK36=if_else(grepl('OmpK36', Omp_mutations), 1, 0))

fisher.test(noResDet$ompK35, noResDet$resistant)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  noResDet$ompK35 and noResDet$resistant
## p-value = 6.934e-09
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   6.15008 28.58310
## sample estimates:
## odds ratio 
##     13.761
prop.test(table(noResDet$ompK35,noResDet$resistant)[2:1,2:1])
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  table(noResDet$ompK35, noResDet$resistant)[2:1, 2:1]
## X-squared = 82.013, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.08469031 0.35834006
## sample estimates:
##     prop 1     prop 2 
## 0.24444444 0.02292926
fisher.test(noResDet$ompK36, noResDet$resistant)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  noResDet$ompK36 and noResDet$resistant
## p-value = 0.04102
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.8500308 9.1624131
## sample estimates:
## odds ratio 
##   3.289251
prop.test(table(noResDet$ompK36,noResDet$resistant)[2:1,2:1])
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  table(noResDet$ompK36, noResDet$resistant)[2:1, 2:1]
## X-squared = 3.7993, df = 1, p-value = 0.05127
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.02948782  0.13201541
## sample estimates:
##    prop 1    prop 2 
## 0.0754717 0.0242079
# porin mutations in unexplained R

cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% filter(Omp_mutations!="-") %>% nrow()
## [1] 14
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% filter(Omp_mutations!="-") %>% nrow()/ (cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow())
## [1] 0.1
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% group_by(Omp_mutations) %>% count()
# assay measures for unexplained R with porin mutations
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% group_by(Omp_mutations, Measurement, Measurement.units) %>% count() %>% filter(Omp_mutations!="-") %>% arrange(Measurement)

numbers for text - unexplained S

# number S with mutations
S_mut <- cipro_antibiogram %>% filter(Resistance.phenotype=="S" & ((QRDR_mutations==1 & Flq_mutations!="GyrA-87G"& Flq_mutations!="GyrA-87H") | (acquired_genes==1 & !grepl('qnrB1.', Flq_acquired)))) 

S_mut %>% nrow()
## [1] 41
# assay measures for unexplained R
S_mut %>% filter(Laboratory.Typing.Method!="Disk diffusion" & Measurement==0.25) %>% nrow()
## [1] 30
S_mut %>% filter(Laboratory.Typing.Method=="Disk diffusion" & Measurement %in% c(25, 26)) %>% nrow()
## [1] 4
S_mut %>% group_by(ST) %>% count()
S_mut %>% group_by(ST) %>% count()%>% nrow()
## [1] 33
S_mut %>% group_by(Flq_acquired, Flq_mutations) %>% count()
# assay measures for unexplained R with porin mutations
S_mut %>% group_by(Flq_acquired, Flq_mutations, Measurement, Measurement.units) %>% count() %>% arrange(Measurement)

MIC regression models

MIC_vs_dets <- cipro_antibiogram %>% 
  filter(Laboratory.Typing.Method!="Disk diffusion") %>%
  select(`GyrA-83F`:`qnrVC6`, aac6, Measurement) %>%
  rename(`aac(6')-Ib-cr`=aac6) %>%
  select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches

# exclude genomes with rare variants
MIC_vs_dets <- MIC_vs_dets[,colSums(MIC_vs_dets) >= 20]

# model MIC vs all common variants (N>=20) = Model 1a
model_MIC_indiv <- lm(log2(Measurement) ~ ., data=MIC_vs_dets)

# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_MIC_indiv_aac6Interaction <- lm(log2(Measurement) ~ .*`aac(6')-Ib-cr`, data=MIC_vs_dets)

Linear regression models

plot MIC linear regression coefficients for each determinant

# summarise model output
linreg_MIC_model1a <- linreg_details(model_MIC_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
    mutate(Determinant=gsub("`","",Determinant)) 
linreg_MIC_model1b <- linreg_details(model_MIC_indiv_aac6Interaction) %>% 
  rename(Term=Determinant) %>%
  separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>% 
  mutate(model="b") %>%
  mutate(Determinant=gsub("`","",Determinant))  %>%
  mutate(Term=gsub("`","",Term))  %>%
  mutate(interaction=gsub("`","",interaction)) 

# all individual determinants, plus significant positive interactions
lin_reg_MIC_plot <- linreg_MIC_model1a %>% bind_rows(linreg_MIC_model1b) %>%
    filter((pval<0.05 & est>0) | is.na(interaction)) %>%
    mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
    filter(Determinant != "(Intercept)") %>%
    ggplot(aes(y=Determinant)) + 
    geom_vline(xintercept=0) + 
    geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
    geom_point(aes(x=est, group=model, col=model), position=pd) + 
    scale_colour_manual(values=c(a="grey", b="black")) +
    theme_bw() + labs(x="Regression coefficient", col="Model", linetype="Interaction")

lin_reg_MIC_plot

disk diffusion linear regression

DD_vs_dets <- cipro_antibiogram %>% 
  filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(`GyrA-83F`:`qnrVC6`, aac6, Measurement) %>%
  rename(`aac(6')-Ib-cr`=aac6) %>%
  select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches

# exclude genomes with rare variants
DD_vs_dets <- DD_vs_dets[,colSums(DD_vs_dets) >= 20]

# model MIC vs all common variants (N>=20) = Model 1a
model_DD_indiv <- lm(Measurement ~ ., data=DD_vs_dets)

# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_DD_indiv_aac6Interaction <- lm(Measurement ~ .*`aac(6')-Ib-cr`, data=DD_vs_dets)

plot MIC linear regression coefficients for each determinant

# summarise model output
linreg_DD_model1a <- linreg_details(model_DD_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
    mutate(Determinant=gsub("`","",Determinant)) 
linreg_DD_model1b <- linreg_details(model_DD_indiv_aac6Interaction) %>% 
  rename(Term=Determinant) %>%
  separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>% 
  mutate(model="b") %>%
  mutate(Determinant=gsub("`","",Determinant))  %>%
  mutate(Term=gsub("`","",Term))  %>%
  mutate(interaction=gsub("`","",interaction)) 

# all individual determinants, plus significant positive interactions
lin_reg_DD_plot <- linreg_DD_model1a %>% bind_rows(linreg_DD_model1b) %>%
    filter((pval<0.05 & est<0) | is.na(interaction)) %>%
    mutate(interaction=replace(interaction, is.na(interaction), "-")) %>%
    filter(Determinant != "(Intercept)") %>%
    ggplot(aes(y=Determinant)) + 
    geom_vline(xintercept=0) + 
    geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
    geom_point(aes(x=est, group=model, col=model), position=pd) + 
    scale_colour_manual(values=c(a="grey", b="black")) +
    theme_bw() + labs(x="Regression coefficient", col="Model", linetype="Interaction") + 
  theme(legend.position="none")

lin_reg_DD_plot

lin_reg_MIC_plot + ggtitle("MIC model") + lin_reg_DD_plot + ggtitle("Disk diffusion zone model") + plot_layout(axes="collect", guides="collect")

ggsave(width=8, height=5, file="figs/FigX_regressionMIC_DD_Models.pdf")
ggsave(width=8, height=5, file="figs/FigX_regressionMIC_DD_Models.png")